import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import seaborn as sns
from copairs.map import average_precision
from copairs.map import mean_average_precision
import pycytominer
def BRD_ID(i):
if type(i) != float:
ID = i.split('-')
if len(ID) >1:
return ID[1]
moa_metadata = pd.read_csv('copairs_csv\\LC00009948_MoA_Common_Names.csv')
#A fucntion to run copairs, save csv and plot the mAP graph
def copairs_batches_earlytimepoint(input_dict, mAP=''):
#defining the parameters for performing copairs
pert_col = "Metadata_broad_sample"
control_col = "Metadata_control_type"
pos_sameby = [pert_col]
pos_diffby = []
neg_sameby = []
#neg_diffby varies based on whether the mAP needs to be calculated with respect to the controls or treatments
if mAP == 'Control':
neg_diffby=[control_col]
else:
neg_diffby=[pert_col]
batch_size = 20000
null_size = 10000
output_dict = {}
for i in input_dict:
with open(i, 'rb') as filetype:
if filetype.read(2) == b'\x1f\x8b':
df = pd.read_csv(i, compression='gzip')
else:
df = pd.read_csv(i)
name = input_dict.get(i)
metadata_columns = [ c for c in df.columns if 'Metadata' in c]
feature_columns = [c for c in df.columns if not 'Metadata' in c]
meta = df[metadata_columns].copy()
meta['Metadata_control_type'] = meta['Metadata_control_type'].fillna('trt')
meta['Metadata_broad_sample'] = meta['Metadata_broad_sample'].fillna('control')
features = df[feature_columns]
features = features.dropna(axis=1).values
result = average_precision(meta, features, pos_sameby, pos_diffby, neg_sameby, neg_diffby, batch_size)
#result.to_csv(f"{i[:-4]}_Result_NegconNorm_mAP_wrt_{mAP}.csv")
aggregated_mAP = mean_average_precision(result, sameby=pos_sameby, null_size= 10000, threshold=0.05, seed=2)
output_dict[name] = aggregated_mAP
#output_dict[name].to_csv(f"{i[:-4]}_Aggregate_result_NegconNorm_mAP_wrt_{mAP}.csv")
combined_df = pd.DataFrame()
for i in output_dict.keys():
df = output_dict.get(i)
combined_df = pd.concat([combined_df, df.assign(dataset = i)])
plt.figure(figsize=(13,10))
sns.boxenplot(data=combined_df,y='dataset', x='mean_average_precision', palette='Set2')
sns.set_style('whitegrid')
sns.set(font='sans serif')
plt.xlabel('Mean Average Precision', fontsize=18)
plt.ylabel('', fontsize=18)
plt.yticks(fontsize=18)
plt.show()
plt.show('notebook')
return plt, output_dict
def cell_count_norm_colorscheme_early(dict1, dict2):
new_dict = {}
for i in dict1:
raw_df = pd.read_csv(i)
if 'phasefeatures' in i:
raw_df = raw_df.rename(columns={'Metadata_broad_sample.1':'Metadata_BRD ID'})
test_df = raw_df.groupby('Metadata_BRD ID')['Metadata_Count_Cells'].mean().to_frame()
test_df = test_df.reset_index()
test_df = test_df.rename(columns={'Metadata_BRD ID':'BRD ID','Metadata_Count_Cells':'Metadata_Count_Cells'+dict1[i]})
test_df['Metadata_Count_Cells'+dict1[i]+'_norm'] = test_df['Metadata_Count_Cells'+dict1[i]]/100
new_dict.update({dict1.get(i):test_df})
else:
test_df = raw_df.groupby('Metadata_BRD ID')['Metadata_Count_Cells'].mean().to_frame()
test_df = test_df.reset_index()
test_df = test_df.rename(columns={'Metadata_BRD ID':'BRD ID','Metadata_Count_Cells':'Metadata_Count_Cells'+dict1[i]})
test_df['Metadata_Count_Cells'+dict1[i]+'_norm'] = test_df['Metadata_Count_Cells'+dict1[i]]/100
new_dict.update({dict1.get(i):test_df})
cell_count_df = None
for key, df in new_dict.items():
if cell_count_df is None:
cell_count_df = df.copy()
else:
cell_count_df = pd.merge(cell_count_df, df, on='BRD ID')
output_dict = {}
for i in dict2:
raw_df = pd.read_csv(i)
raw_df = raw_df.rename(columns={'mean_average_precision':'mean_average_precision'+dict2.get(i)})
subset_df = raw_df[['Metadata_broad_sample', 'mean_average_precision'+dict2.get(i)]]
output_dict[dict2.get(i)] = subset_df
combined_df = None
for key, df in output_dict.items():
if combined_df is None:
combined_df = df.copy()
else:
combined_df = pd.merge(combined_df, df, on='Metadata_broad_sample')
combined_df['BRD ID'] = combined_df['Metadata_broad_sample'].map(BRD_ID)
combined_df_metadata = pd.merge(combined_df, moa_metadata, on='BRD ID')
combined_df_metadata_cell_count = pd.merge(combined_df_metadata, cell_count_df, on='BRD ID')
combined_df_metadata_cell_count = combined_df_metadata_cell_count.rename(columns={'mean_average_precisionSaguaro+DRAQ7_4h':'Saguaro+DRAQ7_4h', 'mean_average_precisionSaguaro+DRAQ7_24h':'Saguaro+DRAQ7_24h', 'mean_average_precisionSaguaro_4h':'Saguaro_4h', 'mean_average_precisionSaguaro_24h':'Saguaro_24h', 'mean_average_precisionSaguaro_48h':'Saguaro_48h'})
plot = go.Figure()
plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro+DRAQ7_4h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[2], symbol='triangle-up'), name = 'Saguaro+DRAQ7_4h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro+DRAQ7_4h_norm']))
plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro+DRAQ7_24h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[2], symbol='square'), name = 'Saguaro+DRAQ7_24h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro+DRAQ7_24h_norm']))
plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_4h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='triangle-up'), name = 'Saguaro_4h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_4h_norm']))
plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_24h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='square'), name = 'Saguaro_24h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_24h_norm']))
plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_48h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='pentagon'), name = 'Saguaro_48h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_48h_norm']))
plot.update_layout(height=1000,width=2000, font_family='sans serif', font=dict(size=18, color='Black'), boxmode='group',yaxis_title = 'Mean average precision', legend=dict(yanchor="top",y=0.99,xanchor="left",x=0.01))
plot.update_layout({'plot_bgcolor':'rgba(0,0,0,0)'})
plot.update_xaxes(tickangle=90, categoryorder='total ascending')
plot.update_traces(marker_sizemin=10, marker_sizemode='area',marker_sizeref=1)
plot.update_layout(legend=dict(itemsizing="constant"))
plot.update_xaxes(linecolor='black')
plot.update_yaxes(linecolor='black')
#plot.update_layout(paper_bgcolor='rgba(0, 0, 0, 0)')
plot.show('notebook')
return plot, combined_df_metadata_cell_count
Batch 6:
Batch 8:
dict_to_load_feature_selected_early_timepoint_48h = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures.csv':'Saguaro+DRAQ7_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures.csv':'Saguaro+DRAQ7_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures.csv':'Saguaro_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures.csv':'Saguaro_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures.csv':'Saguaro_48h'
}
mAP_earlytimepoint_plot_48h, mAP_earlytimepoint_dict_48h = copairs_batches_earlytimepoint(dict_to_load_feature_selected_early_timepoint_48h, mAP='Control')
C:\Users\ssivagur\AppData\Local\Temp\ipykernel_35644\2069034308.py:56: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.
earlytime_point_result_csv = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_48h'
}
earlytime_point_aggregate_csv = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Aggregate_result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_48h'
}
moA_control_earlytime_point_plot, df_earlytime_point_controls = cell_count_norm_colorscheme_early(earlytime_point_result_csv,earlytime_point_aggregate_csv)
mAP_earlytimepoint_trmt_plot_48h, mAP_earlytimepoint_trmt_dict_48h = copairs_batches_earlytimepoint(dict_to_load_feature_selected_early_timepoint_48h, mAP='Treatment')
C:\Users\ssivagur\AppData\Local\Temp\ipykernel_35644\2069034308.py:56: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.
earlytime_point_result_csv_trmt = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_48h'
}
earlytime_point_aggregate_csv_trmt = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_4h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_24h',
'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Aggregate_result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_48h'
}
moA_treatment_earlytime_point_plot, df_earlytime_point_treatment = cell_count_norm_colorscheme_early(earlytime_point_result_csv_trmt,earlytime_point_aggregate_csv_trmt)